knitr::opts_chunk$set(fig.align="center")
library(rstanarm)
library(tidyverse)
library(tidybayes)
library(modelr)
library(ggplot2)
library(magrittr)
library(emmeans)
library(bayesplot)
library(brms)
library(gganimate)
theme_set(theme_light())
task_list <- c("3. Prediction", "4. Exploration")
seed = 12
Read in and format data
interacted_var_sets_data <- read.csv("split_by_participant_groups/num_of_interacted_variable_set.csv")
interacted_var_sets_data <- interacted_var_sets_data %>%
mutate(
dataset = as.factor(dataset),
oracle = as.factor(oracle),
search = as.factor(search),
task = as.factor(task),
participant_group = as.factor(participant_group)
)
Train model
prior_mean = 35.24
prior_sd = 25.33
stanvars <- stanvar(prior_mean, name='prior_mean') + stanvar(prior_sd, name='prior_sd')
model_interacted_var_sets <- brm(
formula = num_interacted_variable_set ~ oracle * search + dataset + task + participant_group + (1 | participant_id),
prior = prior(normal(prior_mean, prior_sd), class = Intercept),
chains = 2,
cores = 2,
iter = 2500,
warmup = 1000,
data = interacted_var_sets_data,
stanvars=stanvars,
seed = seed,
file = "models/interacted_var_sets_group"
)
Plot
plot(model_interacted_var_sets)
Summary
summary(model_interacted_var_sets)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: num_interacted_variable_set ~ oracle * search + dataset + task + participant_group + (1 | participant_id)
## Data: interacted_var_sets_data (Number of observations: 132)
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~participant_id (Number of levels: 66)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 6.36 1.27 3.81 8.84 1.00 950 1347
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 20.50 2.91 14.91 26.18 1.00 2087
## oracledziban 0.67 3.00 -5.21 6.83 1.00 1655
## searchdfs -1.05 3.03 -6.94 4.90 1.00 1791
## datasetmovies 3.57 2.12 -0.60 7.56 1.00 2204
## task4.Exploration 1.90 1.48 -0.99 4.77 1.00 5462
## participant_groupstudent -0.21 2.23 -4.54 4.35 1.00 2035
## oracledziban:searchdfs 1.07 4.31 -7.37 9.68 1.00 1732
## Tail_ESS
## Intercept 2560
## oracledziban 2373
## searchdfs 2151
## datasetmovies 2412
## task4.Exploration 2195
## participant_groupstudent 2368
## oracledziban:searchdfs 1922
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 8.29 0.75 6.94 9.91 1.00 1127 1448
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pairs(
model_interacted_var_sets,
pars = c(
"b_Intercept",
"b_datasetmovies",
"b_oracledziban",
"b_searchdfs",
"b_task4.Exploration",
"b_participant_groupstudent"
),
fixed = TRUE
)
draw_data_interacted_var_sets <- interacted_var_sets_data %>%
add_fitted_draws(model_interacted_var_sets, seed = seed, re_formula = NA)
draw_data_interacted_var_sets$condition <- paste(draw_data_interacted_var_sets$oracle, draw_data_interacted_var_sets$search)
plot_interacted_var_sets <- draw_data_interacted_var_sets %>% ggplot(aes(x = dataset, y = .value, fill = participant_group, alpha = 0.5)) +
stat_eye(.width = c(.95, .5)) +
theme_minimal() +
facet_grid(task ~ condition)
plot_interacted_var_sets
ggsave(
file = paste("interacted_var_sets_split_group.png", sep = ""),
plot = plot_interacted_var_sets,
path = paste0("../plots/posterior_draws/pariticpant_groups/num_interacted_variable_set")
)
## Saving 7 x 5 in image
fit_info_interacted_var_sets <-
draw_data_interacted_var_sets %>% group_by(search, oracle, task, participant_group) %>% mean_qi(.value, .width = c(.95, .5))
fit_info_interacted_var_sets
## # A tibble: 32 x 10
## # Groups: search, oracle, task [8]
## search oracle task participant_gro… .value .lower .upper .width .point
## <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 bfs compa… 3. P… professional 22.3 15.7 28.9 0.95 mean
## 2 bfs compa… 3. P… student 22.1 15.9 28.4 0.95 mean
## 3 bfs compa… 4. E… professional 24.2 17.7 30.9 0.95 mean
## 4 bfs compa… 4. E… student 24.0 17.8 30.2 0.95 mean
## 5 bfs dziban 3. P… professional 23.2 16.5 29.5 0.95 mean
## 6 bfs dziban 3. P… student 22.7 16.6 29.0 0.95 mean
## 7 bfs dziban 4. E… professional 25.1 18.3 31.4 0.95 mean
## 8 bfs dziban 4. E… student 24.6 18.5 30.8 0.95 mean
## 9 dfs compa… 3. P… professional 21.5 14.9 27.8 0.95 mean
## 10 dfs compa… 3. P… student 21.0 14.8 27.3 0.95 mean
## # … with 22 more rows, and 1 more variable: .interval <chr>
write.csv(
fit_info_interacted_var_sets, "../plot_data/posterior_draws/num_interacted_variable_set/interacted_var_sets.csv",
row.names = FALSE
)
predictive_data_interacted_var_sets <- interacted_var_sets_data %>%
add_predicted_draws(model_interacted_var_sets, seed = seed, re_formula = NA)
Difference in search
diff_in_search_prediction <- predictive_data_interacted_var_sets %>%
group_by(search, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = search) %>%
rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_search_prediction_plot <- diff_in_search_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in interacted_var_sets (",
diff_in_search_prediction[1, 'search'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_search_prediction$task)))
diff_in_search_prediction_plot
ggsave(
file = "search_differences.png",
plot = diff_in_search_prediction_plot,
path = paste0("../plots/comparisons/num_interacted_variable_set")
)
## Saving 7 x 5 in image
diff_in_search_prediction_plot_split_by_dataset <- diff_in_search_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_search_prediction_plot_split_by_dataset
ggsave(
file = "search_differences_split_by_dataset.png",
plot = diff_in_search_prediction_plot_split_by_dataset,
path = paste0("../plots/comparisons/num_interacted_variable_set")
)
## Saving 7 x 5 in image
Difference in oracle
diff_in_oracle_prediction <- predictive_data_interacted_var_sets %>%
group_by(oracle, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = oracle) %>%
rename(diff = value)
## `summarise()` regrouping output by 'oracle', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_oracle_prediction_plot <- diff_in_oracle_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in interacted_var_sets (",
diff_in_oracle_prediction[1, 'oracle'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_oracle_prediction$task)))
diff_in_oracle_prediction_plot
ggsave(
file = "oracle_differences.png",
plot = diff_in_oracle_prediction_plot,
path = paste0("../plots/comparisons/num_interacted_variable_set")
)
## Saving 7 x 5 in image
diff_in_oracle_prediction_plot_split_by_dataset <- diff_in_oracle_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_oracle_prediction_plot_split_by_dataset
ggsave(
file = "oracle_differences_split_by_dataset.png",
plot = diff_in_oracle_prediction_plot_split_by_dataset,
path = paste0("../plots/comparisons/num_interacted_variable_set")
)
## Saving 7 x 5 in image
Difference in groups
diff_in_group_prediction <- predictive_data_interacted_var_sets %>%
group_by(search, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = participant_group) %>%
rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_group_prediction_plot <- diff_in_group_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in interacted_var_sets (",
diff_in_group_prediction[1, 'participant_group'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_group_prediction$task)))
diff_in_group_prediction_plot
ggsave(
file = "group_differences.png",
plot = diff_in_group_prediction_plot,
path = paste0("../plots/comparisons/pariticpant_groups/num_interacted_variable_set")
)
## Saving 7 x 5 in image
diff_in_group_prediction_plot_split_by_dataset <- diff_in_group_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_group_prediction_plot_split_by_dataset
ggsave(
file = "group_differences_split_by_dataset.png",
plot = diff_in_group_prediction_plot_split_by_dataset,
path = paste0("../plots/comparisons/pariticpant_groups/num_interacted_variable_set")
)
## Saving 7 x 5 in image
Read in and format data
interacted_visual_design_data <- read.csv("split_by_participant_groups/num_of_interacted_visual_design.csv")
interacted_visual_design_data <- interacted_visual_design_data %>%
mutate(
dataset = as.factor(dataset),
oracle = as.factor(oracle),
search = as.factor(search),
task = as.factor(task),
participant_group = as.factor(participant_group)
)
Train model
prior_mean = 35.24
prior_sd = 25.33
stanvars <- stanvar(prior_mean, name='prior_mean') + stanvar(prior_sd, name='prior_sd')
model_interacted_visual_design <- brm(
formula = num_interacted_visual_design ~ oracle * search + dataset + task + participant_group + (1 | participant_id),
prior = prior(normal(prior_mean, prior_sd), class = Intercept),
chains = 2,
cores = 2,
iter = 2500,
warmup = 1000,
data = interacted_visual_design_data,
stanvars=stanvars,
seed = seed,
file = "models/interacted_visual_design_group"
)
Plot
plot(model_interacted_visual_design)
Summary
summary(model_interacted_visual_design)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: num_interacted_visual_design ~ oracle * search + dataset + task + participant_group + (1 | participant_id)
## Data: interacted_visual_design_data (Number of observations: 132)
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~participant_id (Number of levels: 66)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 7.57 1.32 4.91 10.22 1.00 757 812
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 21.52 3.29 15.14 28.06 1.00 1371
## oracledziban 1.58 3.45 -5.30 8.60 1.00 1140
## searchdfs -2.06 3.45 -8.84 4.96 1.00 1159
## datasetmovies 3.91 2.42 -0.85 8.65 1.00 1383
## task4.Exploration 1.70 1.52 -1.39 4.74 1.00 5023
## participant_groupstudent -0.36 2.50 -5.07 4.63 1.00 1371
## oracledziban:searchdfs 1.46 4.86 -7.98 11.04 1.00 1098
## Tail_ESS
## Intercept 1820
## oracledziban 1612
## searchdfs 1830
## datasetmovies 1719
## task4.Exploration 1731
## participant_groupstudent 1872
## oracledziban:searchdfs 1695
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 8.55 0.81 7.19 10.35 1.00 922 1462
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pairs(
model_interacted_visual_design,
pars = c(
"b_Intercept",
"b_datasetmovies",
"b_oracledziban",
"b_searchdfs",
"b_task4.Exploration",
"b_participant_groupstudent"
),
fixed = TRUE
)
draw_data_interacted_visual_design <- interacted_visual_design_data %>%
add_fitted_draws(model_interacted_visual_design, seed = seed, re_formula = NA)
draw_data_interacted_visual_design$condition <- paste(draw_data_interacted_visual_design$oracle, draw_data_interacted_visual_design$search)
plot_interacted_visual_design <- draw_data_interacted_visual_design %>% ggplot(aes(x = dataset, y = .value, fill = participant_group, alpha = 0.5)) +
stat_eye(.width = c(.95, .5)) +
theme_minimal() +
facet_grid(task ~ condition)
plot_interacted_visual_design
ggsave(
file = paste("interacted_visual_design_split_group.png", sep = ""),
plot = plot_interacted_visual_design,
path = paste0("../plots/posterior_draws/pariticpant_groups/num_interacted_visual_design")
)
## Saving 7 x 5 in image
fit_info_interacted_visual_design <-
draw_data_interacted_visual_design %>% group_by(search, oracle, task, participant_group) %>% mean_qi(.value, .width = c(.95, .5))
fit_info_interacted_visual_design
## # A tibble: 32 x 10
## # Groups: search, oracle, task [8]
## search oracle task participant_gro… .value .lower .upper .width .point
## <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 bfs compa… 3. P… professional 23.5 16.0 30.9 0.95 mean
## 2 bfs compa… 3. P… student 23.1 16.2 30.2 0.95 mean
## 3 bfs compa… 4. E… professional 25.2 17.7 32.6 0.95 mean
## 4 bfs compa… 4. E… student 24.8 17.8 31.8 0.95 mean
## 5 bfs dziban 3. P… professional 25.3 17.9 32.5 0.95 mean
## 6 bfs dziban 3. P… student 24.7 17.9 31.7 0.95 mean
## 7 bfs dziban 4. E… professional 27.0 19.8 34.0 0.95 mean
## 8 bfs dziban 4. E… student 26.4 19.5 33.3 0.95 mean
## 9 dfs compa… 3. P… professional 21.7 14.3 28.6 0.95 mean
## 10 dfs compa… 3. P… student 21.1 14.2 28.1 0.95 mean
## # … with 22 more rows, and 1 more variable: .interval <chr>
write.csv(
fit_info_interacted_visual_design, "../plot_data/posterior_draws/num_interacted_visual_design/interacted_visual_design.csv",
row.names = FALSE
)
predictive_data_interacted_visual_design <- interacted_visual_design_data %>%
add_predicted_draws(model_interacted_visual_design, seed = seed, re_formula = NA)
Difference in search
diff_in_search_prediction <- predictive_data_interacted_visual_design %>%
group_by(search, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = search) %>%
rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_search_prediction_plot <- diff_in_search_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in interacted_visual_design (",
diff_in_search_prediction[1, 'search'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_search_prediction$task)))
diff_in_search_prediction_plot
ggsave(
file = "search_differences.png",
plot = diff_in_search_prediction_plot,
path = "../plots/comparisons/num_interacted_visual_design"
)
## Saving 7 x 5 in image
diff_in_search_prediction_plot_split_by_dataset <- diff_in_search_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_search_prediction_plot_split_by_dataset
ggsave(
file = "search_differences_split_by_dataset.png",
plot = diff_in_search_prediction_plot_split_by_dataset,
path = "../plots/comparisons/num_interacted_visual_design"
)
## Saving 7 x 5 in image
Difference in oracle
diff_in_oracle_prediction <- predictive_data_interacted_visual_design %>%
group_by(oracle, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = oracle) %>%
rename(diff = value)
## `summarise()` regrouping output by 'oracle', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_oracle_prediction_plot <- diff_in_oracle_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in interacted_visual_design (",
diff_in_oracle_prediction[1, 'oracle'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_oracle_prediction$task)))
diff_in_oracle_prediction_plot
ggsave(
file = "oracle_differences.png",
plot = diff_in_oracle_prediction_plot,
path = paste0("../plots/comparisons/num_interacted_visual_design")
)
## Saving 7 x 5 in image
diff_in_oracle_prediction_plot_split_by_dataset <- diff_in_oracle_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_oracle_prediction_plot_split_by_dataset
ggsave(
file = "oracle_differences_split_by_dataset.png",
plot = diff_in_oracle_prediction_plot_split_by_dataset,
path = paste0("../plots/comparisons/num_interacted_visual_design")
)
## Saving 7 x 5 in image
Difference in groups
diff_in_group_prediction <- predictive_data_interacted_visual_design %>%
group_by(search, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = participant_group) %>%
rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_group_prediction_plot <- diff_in_group_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in interacted_visual_design (",
diff_in_group_prediction[1, 'participant_group'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_group_prediction$task)))
diff_in_group_prediction_plot
ggsave(
file = "group_differences.png",
plot = diff_in_group_prediction_plot,
path = "../plots/comparisons/pariticpant_groups/num_interacted_visual_design"
)
## Saving 7 x 5 in image
diff_in_group_prediction_plot_split_by_dataset <- diff_in_group_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_group_prediction_plot_split_by_dataset
ggsave(
file = "group_differences_split_by_dataset.png",
plot = diff_in_group_prediction_plot_split_by_dataset,
path = "../plots/comparisons/pariticpant_groups/num_interacted_visual_design"
)
## Saving 7 x 5 in image
Read in and format data
exposed_variable_set_data <- read.csv("split_by_participant_groups/num_of_exposed_variable_set.csv")
exposed_variable_set_data <- exposed_variable_set_data %>%
mutate(
dataset = as.factor(dataset),
oracle = as.factor(oracle),
search = as.factor(search),
task = as.factor(task),
participant_group = as.factor(participant_group)
)
Train model
prior_mean = 35.24
prior_sd = 25.33
stanvars <- stanvar(prior_mean, name='prior_mean') + stanvar(prior_sd, name='prior_sd')
model_exposed_variable_set <- brm(
formula = num_exposed_variable_set ~ oracle * search + dataset + task + participant_group + (1 | participant_id),
prior = prior(normal(prior_mean, prior_sd), class = Intercept),
chains = 2,
cores = 2,
iter = 2500,
warmup = 1000,
data = exposed_variable_set_data,
stanvars=stanvars,
seed = seed,
file = "models/exposed_variable_set_group"
)
Plot
plot(model_exposed_variable_set)
Summary
summary(model_exposed_variable_set)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: num_exposed_variable_set ~ oracle * search + dataset + task + participant_group + (1 | participant_id)
## Data: exposed_variable_set_data (Number of observations: 132)
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~participant_id (Number of levels: 66)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 10.58 4.40 1.33 18.32 1.01 437 862
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 58.48 6.94 44.94 71.85 1.00 1767
## oracledziban 0.85 7.06 -13.01 14.65 1.00 1290
## searchdfs -24.91 7.30 -39.44 -11.13 1.00 1304
## datasetmovies 13.63 4.90 4.24 23.51 1.00 2733
## task4.Exploration 13.01 4.22 4.85 21.19 1.00 3203
## participant_groupstudent 5.51 5.09 -4.78 15.79 1.00 1937
## oracledziban:searchdfs 22.51 10.16 3.21 43.02 1.00 1202
## Tail_ESS
## Intercept 1508
## oracledziban 1425
## searchdfs 1192
## datasetmovies 2058
## task4.Exploration 1976
## participant_groupstudent 1809
## oracledziban:searchdfs 1805
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 24.21 2.07 20.33 28.42 1.01 742 911
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pairs(
model_exposed_variable_set,
pars = c(
"b_Intercept",
"b_datasetmovies",
"b_oracledziban",
"b_searchdfs",
"b_task4.Exploration",
"b_participant_groupstudent"
),
fixed = TRUE
)
draw_data_exposed_variable_set <- exposed_variable_set_data %>%
add_fitted_draws(model_exposed_variable_set, seed = seed, re_formula = NA)
draw_data_exposed_variable_set$condition <- paste(draw_data_exposed_variable_set$oracle, draw_data_exposed_variable_set$search)
plot_exposed_variable_set <- draw_data_exposed_variable_set %>% ggplot(aes(x = dataset, y = .value, fill = participant_group, alpha = 0.5)) +
stat_eye(.width = c(.95, .5)) +
theme_minimal() +
facet_grid(task ~ condition)
plot_exposed_variable_set
ggsave(
file = paste("exposed_variable_set_split_group.png", sep = ""),
plot = plot_exposed_variable_set,
path = "../plots/posterior_draws/pariticpant_groups/num_exposed_variable_set"
)
## Saving 7 x 5 in image
fit_info_exposed_variable_set <-
draw_data_exposed_variable_set %>% group_by(search, oracle, task, participant_group) %>% mean_qi(.value, .width = c(.95, .5))
fit_info_exposed_variable_set
## # A tibble: 32 x 10
## # Groups: search, oracle, task [8]
## search oracle task participant_gro… .value .lower .upper .width .point
## <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 bfs compa… 3. P… professional 65.3 46.9 83.2 0.95 mean
## 2 bfs compa… 3. P… student 70.8 53.6 88.6 0.95 mean
## 3 bfs compa… 4. E… professional 78.3 60.1 96.6 0.95 mean
## 4 bfs compa… 4. E… student 83.8 66.2 102. 0.95 mean
## 5 bfs dziban 3. P… professional 67.1 48.6 84.1 0.95 mean
## 6 bfs dziban 3. P… student 71.7 54.3 89.0 0.95 mean
## 7 bfs dziban 4. E… professional 80.1 61.6 97.3 0.95 mean
## 8 bfs dziban 4. E… student 84.7 67.2 102. 0.95 mean
## 9 dfs compa… 3. P… professional 41.4 23.0 58.0 0.95 mean
## 10 dfs compa… 3. P… student 45.9 28.8 63.0 0.95 mean
## # … with 22 more rows, and 1 more variable: .interval <chr>
write.csv(
fit_info_exposed_variable_set, "../plot_data/posterior_draws/num_exposed_variable_set/exposed_variable_set.csv",
row.names = FALSE
)
predictive_data_exposed_variable_set <- exposed_variable_set_data %>%
add_predicted_draws(model_exposed_variable_set, seed = seed, re_formula = NA)
Difference in search
diff_in_search_prediction <- predictive_data_exposed_variable_set %>%
group_by(search, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = search) %>%
rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_search_prediction_plot <- diff_in_search_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in exposed_variable_set (",
diff_in_search_prediction[1, 'search'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_search_prediction$task)))
diff_in_search_prediction_plot
ggsave(
file = "search_differences.png",
plot = diff_in_search_prediction_plot,
path = "../plots/comparisons/num_exposed_variable_set"
)
## Saving 7 x 5 in image
diff_in_search_prediction_plot_split_by_dataset <- diff_in_search_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_search_prediction_plot_split_by_dataset
ggsave(
file = "search_differences_split_by_dataset.png",
plot = diff_in_search_prediction_plot_split_by_dataset,
path = "../plots/comparisons/num_exposed_variable_set"
)
## Saving 7 x 5 in image
Difference in oracle
diff_in_oracle_prediction <- predictive_data_exposed_variable_set %>%
group_by(oracle, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = oracle) %>%
rename(diff = value)
## `summarise()` regrouping output by 'oracle', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_oracle_prediction_plot <- diff_in_oracle_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in exposed_variable_set (",
diff_in_oracle_prediction[1, 'oracle'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_oracle_prediction$task)))
diff_in_oracle_prediction_plot
ggsave(
file = "oracle_differences.png",
plot = diff_in_oracle_prediction_plot,
path = paste0("../plots/comparisons/num_exposed_variable_set")
)
## Saving 7 x 5 in image
diff_in_oracle_prediction_plot_split_by_dataset <- diff_in_oracle_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_oracle_prediction_plot_split_by_dataset
ggsave(
file = "oracle_differences_split_by_dataset.png",
plot = diff_in_oracle_prediction_plot_split_by_dataset,
path = "../plots/comparisons/num_exposed_variable_set"
)
## Saving 7 x 5 in image
Difference in groups
diff_in_group_prediction <- predictive_data_exposed_variable_set %>%
group_by(search, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = participant_group) %>%
rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_group_prediction_plot <- diff_in_group_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in exposed_variable_set (",
diff_in_group_prediction[1, 'participant_group'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_group_prediction$task)))
diff_in_group_prediction_plot
ggsave(
file = "group_differences.png",
plot = diff_in_group_prediction_plot,
path = "../plots/comparisons/pariticpant_groups/num_exposed_variable_set"
)
## Saving 7 x 5 in image
diff_in_group_prediction_plot_split_by_dataset <- diff_in_group_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_group_prediction_plot_split_by_dataset
ggsave(
file = "group_differences_split_by_dataset.png",
plot = diff_in_group_prediction_plot_split_by_dataset,
path = "../plots/comparisons/pariticpant_groups/num_exposed_variable_set"
)
## Saving 7 x 5 in image
Read in and format data
exposed_visual_design_data <- read.csv("split_by_participant_groups/num_of_exposed_visual_design.csv")
exposed_visual_design_data <- exposed_visual_design_data %>%
mutate(
dataset = as.factor(dataset),
oracle = as.factor(oracle),
search = as.factor(search),
task = as.factor(task),
participant_group = as.factor(participant_group)
)
Train model
prior_mean = 35.24
prior_sd = 25.33
stanvars <- stanvar(prior_mean, name='prior_mean') + stanvar(prior_sd, name='prior_sd')
model_exposed_visual_design <- brm(
formula = num_exposed_visual_design ~ oracle * search + dataset + task + participant_group + (1 | participant_id),
prior = prior(normal(prior_mean, prior_sd), class = Intercept),
chains = 2,
cores = 2,
iter = 2500,
warmup = 1000,
data = exposed_visual_design_data,
stanvars=stanvars,
seed = seed,
file = "models/_exposed_visual_design_group"
)
Plot
plot(model_exposed_visual_design)
Summary
summary(model_exposed_visual_design)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: num_exposed_visual_design ~ oracle * search + dataset + task + participant_group + (1 | participant_id)
## Data: exposed_visual_design_data (Number of observations: 132)
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
## total post-warmup samples = 3000
##
## Group-Level Effects:
## ~participant_id (Number of levels: 66)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 14.06 5.05 2.33 22.69 1.00 505 670
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 61.87 8.28 44.95 77.58 1.00 1694
## oracledziban 7.37 8.71 -9.44 24.84 1.00 1379
## searchdfs -30.65 8.55 -47.39 -13.14 1.00 1373
## datasetmovies 14.24 6.29 1.06 26.39 1.00 1669
## task4.Exploration 15.67 5.00 5.96 25.40 1.00 3565
## participant_groupstudent 5.91 6.28 -6.30 18.06 1.00 2131
## oracledziban:searchdfs 31.43 12.35 6.88 55.92 1.00 1259
## Tail_ESS
## Intercept 2080
## oracledziban 1715
## searchdfs 1852
## datasetmovies 1511
## task4.Exploration 2089
## participant_groupstudent 1996
## oracledziban:searchdfs 1385
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 28.26 2.50 23.64 33.36 1.00 858 1518
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pairs(
model_exposed_visual_design,
pars = c(
"b_Intercept",
"b_datasetmovies",
"b_oracledziban",
"b_searchdfs",
"b_task4.Exploration",
"b_participant_groupstudent"
),
fixed = TRUE
)
draw_data_exposed_visual_design <- exposed_visual_design_data %>%
add_fitted_draws(model_exposed_visual_design, seed = seed, re_formula = NA)
draw_data_exposed_visual_design$condition <- paste(draw_data_exposed_visual_design$oracle, draw_data_exposed_visual_design$search)
plot_exposed_visual_design <- draw_data_exposed_visual_design %>% ggplot(aes(x = dataset, y = .value, fill = participant_group, alpha = 0.5)) +
stat_eye(.width = c(.95, .5)) +
theme_minimal() +
facet_grid(task ~ condition)
plot_exposed_visual_design
ggsave(
file = paste("exposed_visual_design_split_group.png", sep = ""),
plot = plot_exposed_visual_design,
path = "../plots/posterior_draws/pariticpant_groups/num_exposed_visual_design"
)
## Saving 7 x 5 in image
fit_info_exposed_visual_design <-
draw_data_exposed_visual_design %>% group_by(search, oracle, task, participant_group) %>% mean_qi(.value, .width = c(.95, .5))
fit_info_exposed_visual_design
## # A tibble: 32 x 10
## # Groups: search, oracle, task [8]
## search oracle task participant_gro… .value .lower .upper .width .point
## <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 bfs compa… 3. P… professional 69.0 48.1 89.3 0.95 mean
## 2 bfs compa… 3. P… student 74.9 55.1 95.1 0.95 mean
## 3 bfs compa… 4. E… professional 84.7 63.1 105. 0.95 mean
## 4 bfs compa… 4. E… student 90.6 70.2 111. 0.95 mean
## 5 bfs dziban 3. P… professional 77.4 56.1 97.4 0.95 mean
## 6 bfs dziban 3. P… student 82.3 62.5 102. 0.95 mean
## 7 bfs dziban 4. E… professional 93.0 71.9 113. 0.95 mean
## 8 bfs dziban 4. E… student 97.9 78.4 118. 0.95 mean
## 9 dfs compa… 3. P… professional 39.3 18.7 58.6 0.95 mean
## 10 dfs compa… 3. P… student 44.2 24.5 63.8 0.95 mean
## # … with 22 more rows, and 1 more variable: .interval <chr>
write.csv(
fit_info_exposed_visual_design, "../plot_data/posterior_draws/num_exposed_visual_design/exposed_visual_design.csv",
row.names = FALSE
)
predictive_data_exposed_visual_design <- exposed_visual_design_data %>%
add_predicted_draws(model_exposed_visual_design, seed = seed, re_formula = NA)
Difference in search
diff_in_search_prediction <- predictive_data_exposed_visual_design %>%
group_by(search, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = search) %>%
rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_search_prediction_plot <- diff_in_search_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in exposed_visual_design (",
diff_in_search_prediction[1, 'search'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_search_prediction$task)))
diff_in_search_prediction_plot
ggsave(
file = "search_differences.png",
plot = diff_in_search_prediction_plot,
path = "../plots/comparisons/num_exposed_visual_design"
)
## Saving 7 x 5 in image
diff_in_search_prediction_plot_split_by_dataset <- diff_in_search_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_search_prediction_plot_split_by_dataset
ggsave(
file = "search_differences_split_by_dataset.png",
plot = diff_in_search_prediction_plot_split_by_dataset,
path = "../plots/comparisons/num_exposed_visual_design"
)
## Saving 7 x 5 in image
Difference in oracle
diff_in_oracle_prediction <- predictive_data_exposed_visual_design %>%
group_by(oracle, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = oracle) %>%
rename(diff = value)
## `summarise()` regrouping output by 'oracle', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_oracle_prediction_plot <- diff_in_oracle_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in exposed_visual_design (",
diff_in_oracle_prediction[1, 'oracle'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_oracle_prediction$task)))
diff_in_oracle_prediction_plot
ggsave(
file = "oracle_differences.png",
plot = diff_in_oracle_prediction_plot,
path = paste0("../plots/comparisons/num_exposed_visual_design")
)
## Saving 7 x 5 in image
diff_in_oracle_prediction_plot_split_by_dataset <- diff_in_oracle_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_oracle_prediction_plot_split_by_dataset
ggsave(
file = "oracle_differences_split_by_dataset.png",
plot = diff_in_oracle_prediction_plot_split_by_dataset,
path = paste0("../plots/comparisons/num_exposed_visual_design")
)
## Saving 7 x 5 in image
Difference in groups
diff_in_group_prediction <- predictive_data_exposed_visual_design %>%
group_by(search, task, dataset, participant_group, .draw) %>%
summarize(value = weighted.mean(.prediction)) %>%
compare_levels(value, by = participant_group) %>%
rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_group_prediction_plot <- diff_in_group_prediction %>%
ggplot(aes(x = diff, y = task)) +
xlab(
paste0(
"Difference in exposed_visual_design (",
diff_in_group_prediction[1, 'participant_group'],
")"
)
) +
ylab("Task") +
stat_halfeye(.width = c(.95, .5)) +
geom_vline(xintercept = 0, linetype = "longdash") +
theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_group_prediction$task)))
diff_in_group_prediction_plot
ggsave(
file = "group_differences.png",
plot = diff_in_group_prediction_plot,
path = paste0("../plots/comparisons/pariticpant_groups/num_exposed_visual_design")
)
## Saving 7 x 5 in image
diff_in_group_prediction_plot_split_by_dataset <- diff_in_group_prediction_plot + aes(fill = dataset, alpha = 0.5)
diff_in_group_prediction_plot_split_by_dataset
ggsave(
file = "group_differences_split_by_dataset.png",
plot = diff_in_group_prediction_plot_split_by_dataset,
path = "../plots/comparisons/pariticpant_groups/num_exposed_visual_design"
)
## Saving 7 x 5 in image
draw_data_exposed_variable_set$category <- "exposed"
draw_data_interacted_var_sets$category <- "interacted"
data_conbined <- rbind(draw_data_exposed_variable_set, draw_data_interacted_var_sets)
plot_var_set <- data_conbined %>% ggplot(aes(x = oracle, y = .value, fill = category, alpha = 0.5)) +
stat_eye(.width = c(.95, .5)) +
theme_minimal() +
facet_grid(task ~ search) +
ylab("Number of Elements")
plot_var_set
ggsave(
file = "interaction_var_sets.png",
plot = plot_var_set,
path = "../plots/posterior_draws"
)
## Saving 7 x 5 in image
draw_data_exposed_visual_design$category <- "exposed"
draw_data_interacted_visual_design$category <- "interacted"
data_conbined <- rbind(draw_data_exposed_visual_design, draw_data_interacted_visual_design)
plot_vis_design <- data_conbined %>% ggplot(aes(x = oracle, y = .value, fill = category, alpha = 0.5)) +
stat_eye(.width = c(.95, .5)) +
theme_minimal() +
facet_grid(task ~ search) +
ylab("Number of Elements")
plot_vis_design
ggsave(
file = "interaction_vis_design.png",
plot = plot_vis_design,
path = "../plots/posterior_draws"
)
## Saving 7 x 5 in image